/*******************************************************************************

Ryan Hill: ryan.hill@kellogg.northwestern.edu
Carolyn Stein: carolyn_stein@berkeley.edu
Last modified: December 2024

Inputs: 	clean_summary.dta
			clean_citation.dta
			clean_refine.dta
			clean_collection.dta
			clean_entity_values.dta
			clean_pubmed.dta
			clean_jifs.dta
			clean_affiliations_first_author.dta
			clean_validation.dta

Outputs: 	pdb_full.dta

Purpose: 	Merges together all of the different clean datasets

*******************************************************************************/

use "${data_clean}clean_summary.dta", clear

	merge 1:1 structureId using "${data_clean}clean_citation.dta"
		* some citations match to PDB entries that have been deleted
		drop if _merge == 2 
		drop _merge
		
	merge 1:1 structureId using "${data_clean}clean_refine.dta"
		* some refinement details match to PDB entries that have been deleted
		drop if _merge == 2 
		drop _merge
		
	merge 1:1 structureId using "${data_clean}clean_collection.dta"
		* some collection details match to PDB entries that have been deleted
		drop if _merge == 2
		drop _merge
		
	merge 1:m structureId using "${data_clean}clean_entity_values.dta"
		* some entity details match to PDB entries that have been deleted
		drop if _merge == 2
		drop _merge
		
	merge m:1 pubmedId using "${data_clean}clean_pubmed.dta"
		* some pubmed details don't match PDB entries (later download date)
		drop if _merge == 2 // 
		drop _merge
		
	merge m:1 structureId using "${data_clean}clean_validation.dta"
		* some validation reports don't match PDB entries (later download date)
		drop if _merge == 2
		drop _merge
		
	merge m:1 structureId using "${data_clean}clean_drugbank.dta"
		* some drugbank don't match PDB entries (later download date)
		drop if _merge == 2 
		drop _merge			
		
	format structureTitle structureAuthor* paperAuthor* title %50s	
	
save "${data_built}pdb_full.dta", replace	
